loads libraries

library(here)
library(rjags)
library(ggplot2)
library(tidyverse)
library(gNanoPkg)

Initialise data

gNano.df = readData()

Specify where the results are stored relative to the gNano project and this file.

jamesDuncan = TRUE

resultsRoot = if(jamesDuncan){
    "../systematic/"
  }else{
    "../gNanoPkg/systematic/"
  }

loadResults = function(model = c(paste0("g-",0:4), paste0("ln-",0:4)),
                       summary = FALSE){

  model = match.arg(model)
  
  resultsFile = glue("{resultsRoot}{model}.Rda")
  results = load(resultsFile)
  
  if(summary){
    return(simSummary)
  }
  
  return(as.data.frame(sim.sample[[1]]))
}

Model \(g_0\) - no effects

Model $g_0$

Model \(g_0\)

Model: \[ y_{c,l,a} \sim \Gamma(r,s) \] If \[ \mathrm{E}[y_{c,l,a}]) = \log(\mu)\mbox{ and }\mathrm{Var}[y_{c,l,a}] = \sigma^2 \] then we let \[ r = \frac{\mu^2}{\sigma^2}\mbox{ and }s = \frac{\mu}{\sigma^2} \] Our prior on \(\sigma^2\) is \(inverse-\Gamma(0.001, 0.001)\), and our prior on \(\log(\mu)\) is \(N(0, 10^6)\).

First I will load up the results.

g0.df = loadResults("g-0")

I’ll do this with ggplot2 so I can teach myself something

p = g0.df %>% 
  ggplot(aes(x = tau)) +
  geom_density()
p

We want plots of shape and rate, so I will create them in the data frame first as we didn’t store them during the sampling

g0.df = g0.df %>% 
  mutate(shape = exp(log.Mu)^2 * tau) %>% 
  mutate(rate = exp(log.Mu) * tau)
p = g0.df %>% 
  ggplot(aes(x = shape))+
  geom_density()
p

p = g0.df %>% 
  ggplot(aes(x = rate))+
  geom_density()
p

rateDensity = density(g0.df$rate)
shapeDensity = density(g0.df$shape)

rateMode = rateDensity$x[which.max(rateDensity$y)]
shapeMode = shapeDensity$x[which.max(shapeDensity$y)]

density.df = data.frame(x = seq(1, 30000, 1)) %>% 
  mutate(y = dgamma(x, rate = rateMode, shape = shapeMode))

p = gNano.df %>% 
  ggplot(aes(x = obs, y = stat(density))) + 
  geom_histogram(binwidth = 400) +
  geom_line(data = density.df, aes(x, y))
p

Plot the observed vs the expected peak heights

expected = g0.df %>% 
  select(starts_with("pred")) %>% 
  summarise_all(mean) %>% 
  unlist()
  
plot.df = data.frame(observed = gNano.df$obs, expected = expected)
p = plot.df %>% 
  ggplot(aes(x = log10(observed), y = log10(expected))) +
           geom_point() +
           geom_abline(aes(intercept = 0, slope = 1)) + 
          xlim(1, 5) +
          ylim(1, 5) + 
          xlab(expression(log[10]~(observed~height))) + 
          ylab(expression(log[10]~(expected~height))) + 
          theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Model \(g_1\) - profile effects

Model $g_1$

Model \(g_1\)

Let’s have a look at the profile effects. I like to use error bar plots

g1.df = loadResults("g-1")

This code is not elegant, and is probably stupid but then so is the tidyverse a lot of the time :-)

lb = g1.df %>% 
    select(starts_with("beta.profile")) %>% 
    summarise_all(quantile, prob = c(0.025)) %>% 
  unlist()

med = g1.df %>% 
  select(starts_with("beta.profile")) %>% 
  summarise_all(quantile, prob = c(0.5)) %>% 
  unlist()

ub = g1.df %>% 
  select(starts_with("beta.profile")) %>% 
  summarise_all(quantile, prob = c(0.975)) %>% 
  unlist()

quantiles.df = data.frame(lb = lb, med = med, ub = ub) %>% 
  mutate(profile = 1:102)

p = ggplot(data = quantiles.df, aes(x = profile, y = quantiles.df$med)) +
  geom_point(aes(col = "red"), show.legend = FALSE) + 
  geom_errorbar(aes(ymin = lb, ymax = ub)) +
  geom_hline(aes(yintercept=0)) +
  ylab(bquote(tau[c])) + 
  xlab("Profile") + 
  theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
  
p

aph = gNano.df %>% 
  group_by(prof) %>% 
  summarise(aph = mean(log(obs), na.rm = TRUE)) %>% 
  pull(aph)


meanEffect = g1.df %>% 
  select(starts_with("beta.profile")) %>% 
  summarise_all(mean, na.rm = TRUE) %>% 
  unlist()
  
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>% 
  ggplot(aes(x = aph, y = meanEffect)) + 
  geom_point(show.legend = FALSE) +
  geom_smooth() + 
  xlab(expression(log[10]~(aph))) + 
  ylab(bquote(mean~tau[c])) + 
  theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))

p

Plot the observed vs the expected peak heights

expected = g1.df %>% 
  select(starts_with("pred")) %>% 
  summarise_all(mean) %>% 
  unlist()
  
plot.df = data.frame(observed = gNano.df$obs, expected = expected)
p = plot.df %>% 
  ggplot(aes(x = log10(observed), y = log10(expected))) +
           geom_point() +
           geom_abline(aes(intercept = 0, slope = 1)) + 
          xlim(1, 5) +
          ylim(1, 5) + 
          xlab(expression(log[10]~(observed~height))) + 
          ylab(expression(log[10]~(expected~height))) + 
          theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Model \(g_2\) - locus effects

Model $g_2$

Model \(g_2\)

g2.df = loadResults("g-2")

lb = g2.df %>% 
    select(starts_with("alpha.locus")) %>% 
    summarise_all(quantile, prob = c(0.025)) %>% 
  unlist()

med = g2.df %>% 
  select(starts_with("alpha.locus")) %>% 
  summarise_all(quantile, prob = c(0.5)) %>% 
  unlist()

ub = g2.df %>% 
  select(starts_with("alpha.locus")) %>% 
  summarise_all(quantile, prob = c(0.975)) %>% 
  unlist()

quantiles.df = data.frame(lb = lb, med = med, ub = ub) %>% 
  mutate(locus = 1:31)

p = ggplot(data = quantiles.df, aes(x = locus, y = quantiles.df$med)) +
  geom_point(aes(col = "red"), show.legend = FALSE) + 
  geom_errorbar(aes(ymin = lb, ymax = ub)) +
  geom_hline(aes(yintercept=0)) +
  ylab(bquote(alpha[l])) + 
  xlab("Locus") + 
  theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Plot the observed vs the expected peak heights

expected = g2.df %>% 
  select(starts_with("pred")) %>% 
  summarise_all(mean) %>% 
  unlist()
  
plot.df = data.frame(observed = gNano.df$obs, expected = expected)
p = plot.df %>% 
  ggplot(aes(x = log10(observed), y = log10(expected))) +
           geom_point() +
           geom_abline(aes(intercept = 0, slope = 1)) + 
          xlim(1, 5) +
          ylim(1, 5) + 
          xlab(expression(log[10]~(observed~height))) + 
          ylab(expression(log[10]~(expected~height))) + 
          theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Plot the locus effects by Average Peak Height:

aph = gNano.df %>% 
  group_by(loc) %>% 
  summarise(aph = mean(log(obs), na.rm = TRUE)) %>% 
  pull(aph)


meanEffect = g2.df %>% 
  select(starts_with("alpha.locus")) %>% 
  summarise_all(mean, na.rm = TRUE) %>% 
  unlist()
  
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>% 
  ggplot(aes(x = aph, y = meanEffect)) + 
  geom_point(show.legend = FALSE) +
  geom_smooth() + 
  xlab(expression(log[10]~(aph))) + 
  ylab(bquote(mean~alpha[l])) + 
  theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Model \(g_3\) - all other effects except extra variance

Model $g_2$

Model \(g_2\)

Dye effects

g3.df = loadResults("g-3")

lb = g3.df %>% 
    select(starts_with("gamma.dye")) %>% 
    summarise_all(quantile, prob = c(0.025)) %>% 
  unlist()

med = g3.df %>% 
  select(starts_with("gamma.dye")) %>% 
  summarise_all(quantile, prob = c(0.5)) %>% 
  unlist()

ub = g3.df %>% 
  select(starts_with("gamma.dye")) %>% 
  summarise_all(quantile, prob = c(0.975)) %>% 
  unlist()

quantiles.df = data.frame(lb = lb, med = med, ub = ub) %>% 
  mutate(dye = 1:4)

p = ggplot(data = quantiles.df, aes(x = dye, y = quantiles.df$med)) +
  geom_point(aes(col = "red"), show.legend = FALSE) + 
  geom_errorbar(aes(ymin = lb, ymax = ub)) +
  geom_hline(aes(yintercept=0)) +
  ylab(bquote(delta[f])) + 
  xlab("Dye") +
  theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Plot the dye effects by Average Peak Height - no fitted line here because with four points who cares:

aph = gNano.df %>% 
  group_by(dye) %>% 
  summarise(aph = mean(log(obs), na.rm = TRUE)) %>% 
  pull(aph)


meanEffect = g3.df %>% 
  select(starts_with("gamma.dye")) %>% 
  summarise_all(mean, na.rm = TRUE) %>% 
  unlist()
  
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>% 
  ggplot(aes(x = aph, y = meanEffect)) + 
  geom_point(show.legend = FALSE) +
  geom_smooth() + 
  xlab(expression(log[10]~(aph))) + 
  ylab(bquote(mean~delta[f])) + 
  theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 3.296
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.095418
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.036101
## Warning in sqrt(sum.squares/one.delta): NaNs produced
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small.
## fewer data values than degrees of freedom.
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 3.296
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.095418
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.036101
## Warning in stats::qt(level/2 + 0.5, pred$df): NaNs produced

Plot the observed vs the expected peak heights

expected = g3.df %>% 
  select(starts_with("pred")) %>% 
  summarise_all(mean) %>% 
  unlist()
  
plot.df = data.frame(observed = gNano.df$obs, expected = expected)
p = plot.df %>% 
  ggplot(aes(x = log10(observed), y = log10(expected))) +
           geom_point() +
           geom_abline(aes(intercept = 0, slope = 1)) + 
          xlim(1, 5) +
          ylim(1, 5) + 
          xlab(expression(log[10]~(observed~height))) + 
          ylab(expression(log[10]~(expected~height))) + 
          theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Model \(g_4\) - extra variance

Is the added variance being used?

g4.df = loadResults("g-4")

g4.df = g4.df %>% 
  mutate(sigma.sq0 = 1 / tau0)

p = g4.df %>% 
  ggplot(aes(x = sigma.sq0/10^6)) +
  geom_density() +
  geom_vline(xintercept = quantile(g4.df$sigma.sq0/10^6, probs=0.025)) +
  geom_vline(xintercept = quantile(g4.df$sigma.sq0/10^6, probs=0.975)) +
  xlab(bquote(sigma[0]^2~(x10^6))) + 
  theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Plot the observed vs the expected peak heights

expected.g4 = g4.df %>% 
  select(starts_with("pred")) %>% 
  summarise_all(mean) %>% 
  unlist()
  
plot.df = data.frame(observed = gNano.df$obs, expected.g4 = expected.g4, expected.g3 = expected)
p = plot.df %>% 
  ggplot(aes(x = log10(observed), y = log10(expected.g4))) +
           geom_point() +
           geom_abline(aes(intercept = 0, slope = 1)) + 
          xlim(1, 5) +
          ylim(1, 5) + 
          xlab(expression(log[10]~(observed~height))) + 
          ylab(expression(log[10]~(expected~height))) + 
          theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Is it doing anything?

p = plot.df %>% 
  ggplot(aes(x = log10(expected.g3), y = log10(expected.g4))) +
           geom_point() +
           geom_abline(aes(intercept = 0, slope = 1)) + 
          xlim(1, 5) +
          ylim(1, 5)
p

I think it’s doing what it is supposed to.

Let’s try and look at the expected values slightly differently. I will work with the summary stats rather than the raw data

simSummary = loadResults("g-3", summary = TRUE)

g3.quantiles = as.data.frame(t(simSummary$quantiles)) %>% 
  select(starts_with("pred"))  %>%
   rownames_to_column %>% 
   gather(var, value, -rowname) %>% 
   spread(rowname, value) %>% 
  mutate(var = as.numeric(gsub("^pred\\[([0-9]+)\\]$", "\\1", var))) %>% 
  arrange(var)

g3.stats = as.data.frame(t(simSummary$statistics)) %>% 
  select(starts_with("pred"))  %>%
   rownames_to_column %>% 
   gather(var, value, -rowname) %>% 
   spread(rowname, value) %>% 
  mutate(var = as.numeric(gsub("^pred\\[([0-9]+)\\]$", "\\1", var))) %>% 
  arrange(var)



simSummary = loadResults("g-4", summary = TRUE)

g4.quantiles = as.data.frame(t(simSummary$quantiles)) %>% 
    select(starts_with("pred"))  %>%
   rownames_to_column %>% 
   gather(var, value, -rowname) %>% 
   spread(rowname, value)  %>% 
  mutate(var = as.numeric(gsub("^pred\\[([0-9]+)\\]$", "\\1", var))) %>% 
  arrange(var)

plot.df = bind_rows(g3.quantiles, g4.quantiles) %>% 
  mutate(model = rep(c("g3", "g4"), rep(nrow(g3.quantiles), 2))) %>% 
  mutate(obs = rep(gNano.df$obs, 2)) %>% 
  rename(med = `50%`, lb = `2.5%`, ub = `97.5%`) %>% 
  select(obs, model, lb, med, ub) %>% 
  arrange(obs)


p = plot.df %>% 
  ggplot(aes(x = log10(obs), y = log10(med))) +
  facet_grid(vars(model)) +
    geom_abline(aes(intercept = 0, slope = 1)) +
    xlim(1, 5) +
    ylim(1, 5) +
  geom_point(show.legend = FALSE) + 
  geom_smooth() +
  geom_line(aes(y = log10(lb)), col = "green") + geom_line(aes(y = log10(ub)), col = "red")
p
## Warning: Removed 35 rows containing non-finite values (stat_smooth).
## Warning: Removed 35 rows containing missing values (geom_point).
## Warning: Removed 26 rows containing missing values (geom_path).

Now we move on the the log normal models

Model \(ln_0\) - no effects

Tau

ln0.df = loadResults("ln-0")

p = ln0.df %>% 
  ggplot(aes(x = tau)) +
  geom_density()
p

Mu

p = ln0.df %>% 
  ggplot(aes(x = Mu)) +
  geom_density()
p

Comparing expected and observed peak heights

gNano.df = readData()
expected = ln0.df %>% 
  select(starts_with("pred")) %>% 
  summarise_all(mean) %>% 
  unlist()
  
plot.df = data.frame(observed = gNano.df$obs, expected = expected)
p = plot.df %>% 
  ggplot(aes(x = log10(observed), y = log10(exp(expected)))) +
           geom_point() +
           geom_abline(aes(intercept = 0, slope = 1)) + 
          xlim(1, 5) +
          ylim(1, 5) + 
          xlab(expression(log[10]~(observed~height))) + 
          ylab(expression(log[10]~(expected~height))) + 
          theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Model \(ln_1\) - profile effects

Load the data

ln1.df = loadResults("ln-1")

Showing the per profile effects

lb = ln1.df %>% 
    select(starts_with("beta.profile")) %>% 
    summarise_all(quantile, prob = c(0.025)) %>% 
  unlist()

med = ln1.df %>% 
  select(starts_with("beta.profile")) %>% 
  summarise_all(quantile, prob = c(0.5)) %>% 
  unlist()

ub = ln1.df %>% 
  select(starts_with("beta.profile")) %>% 
  summarise_all(quantile, prob = c(0.975)) %>% 
  unlist()

quantiles.df = data.frame(lb = lb, med = med, ub = ub) %>% 
  mutate(profile = 1:102)

p = ggplot(data = quantiles.df, aes(x = profile, y = quantiles.df$med)) +
  geom_point(aes(col = "red"), show.legend = FALSE) + 
  geom_errorbar(aes(ymin = lb, ymax = ub)) +
  geom_hline(aes(yintercept=0)) +
  ylab(bquote(tau[c])) + 
  xlab("Profile") + 
  theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
  
p

Now showing the relationship between profile aph and profile effects from model

aph = gNano.df %>% 
  group_by(prof) %>% 
  summarise(aph = mean(log(obs), na.rm = TRUE)) %>% 
  pull(aph)


meanEffect = ln1.df %>% 
  select(starts_with("beta.profile")) %>% 
  summarise_all(mean, na.rm = TRUE) %>% 
  unlist()
  
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>% 
  ggplot(aes(x = aph, y = meanEffect)) + 
  geom_point(show.legend = FALSE) +
  geom_smooth() +
  xlab(expression(log[10]~(aph))) + 
  ylab(bquote(mean~tau[c])) + 
  theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Plotting observed vs expected peak heights

expected = ln1.df %>% 
  select(starts_with("pred")) %>% 
  summarise_all(mean) %>% 
  unlist()
  
plot.df = data.frame(observed = gNano.df$obs, expected = expected)
p = plot.df %>% 
  ggplot(aes(x = log10(observed), y = log10(exp(expected)))) +
           geom_point() +
           geom_abline(aes(intercept = 0, slope = 1)) + 
          xlim(1, 5) +
          ylim(1, 5) + 
          xlab(expression(log[10]~(observed~height))) + 
          ylab(expression(log[10]~(expected~height))) + 
          theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Model \(ln_2\) - locus effects

ln2.df = loadResults("ln-2")

lb = ln2.df %>% 
    select(starts_with("alpha.locus")) %>% 
    summarise_all(quantile, prob = c(0.025)) %>% 
  unlist()

med = ln2.df %>% 
  select(starts_with("alpha.locus")) %>% 
  summarise_all(quantile, prob = c(0.5)) %>% 
  unlist()

ub = ln2.df %>% 
  select(starts_with("alpha.locus")) %>% 
  summarise_all(quantile, prob = c(0.975)) %>% 
  unlist()

quantiles.df = data.frame(lb = lb, med = med, ub = ub) %>% 
  mutate(locus = 1:31)

p = ggplot(data = quantiles.df, aes(x = locus, y = quantiles.df$med)) +
  geom_point(aes(col = "red"), show.legend = FALSE) + 
  geom_errorbar(aes(ymin = lb, ymax = ub)) +
  geom_hline(aes(yintercept=0)) +
  ylab(bquote(alpha[l])) + 
  xlab("Locus") + 
  theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Plot the observed vs the expected peak heights

expected = ln2.df %>% 
  select(starts_with("pred")) %>% 
  summarise_all(mean) %>% 
  unlist()
  
plot.df = data.frame(observed = gNano.df$obs, expected = expected)
p = plot.df %>% 
  ggplot(aes(x = log10(observed), y = log10(exp(expected)))) +
           geom_point() +
           geom_abline(aes(intercept = 0, slope = 1)) + 
          xlim(1, 5) +
          ylim(1, 5) +
          xlab(expression(log[10]~(observed~height))) + 
          ylab(expression(log[10]~(expected~height))) + 
          theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Plot the locus effects by Average Peak Height:

aph = gNano.df %>% 
  group_by(loc) %>% 
  summarise(aph = mean(log(obs), na.rm = TRUE)) %>% 
  pull(aph)


meanEffect = ln2.df %>% 
  select(starts_with("alpha.locus")) %>% 
  summarise_all(mean, na.rm = TRUE) %>% 
  unlist()
  
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>% 
  ggplot(aes(x = aph, y = meanEffect)) + 
  geom_point(show.legend = FALSE) +
  geom_smooth() +
  xlab(expression(log[10]~(aph))) + 
  ylab(bquote(mean~alpha[l])) + 
  theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Model \(ln_3\) - all other effects except extra variance

Dye effects

ln3.df = loadResults("ln-3")


lb = ln3.df %>% 
    select(starts_with("gamma.dye")) %>% 
    summarise_all(quantile, prob = c(0.025)) %>% 
  unlist()

med = ln3.df %>% 
  select(starts_with("gamma.dye")) %>% 
  summarise_all(quantile, prob = c(0.5)) %>% 
  unlist()

ub = ln3.df %>% 
  select(starts_with("gamma.dye")) %>% 
  summarise_all(quantile, prob = c(0.975)) %>% 
  unlist()

quantiles.df = data.frame(lb = lb, med = med, ub = ub) %>% 
  mutate(dye = 1:4)

p = ggplot(data = quantiles.df, aes(x = dye, y = quantiles.df$med)) +
  geom_point(aes(col = "red"), show.legend = FALSE) + 
  geom_errorbar(aes(ymin = lb, ymax = ub)) +
  geom_hline(aes(yintercept=0)) +
  ylab(bquote(delta[f])) + 
  xlab("Dye") +
  theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Plot the dye effects by Average Peak Height - no fitted line here because with four points who cares:

aph = gNano.df %>% 
  group_by(dye) %>% 
  summarise(aph = mean(log(obs), na.rm = TRUE)) %>% 
  pull(aph)


meanEffect = ln3.df %>% 
  select(starts_with("gamma.dye")) %>% 
  summarise_all(mean, na.rm = TRUE) %>% 
  unlist()
  
plot.df = data.frame(aph = log10(exp(aph)), meanEffect = meanEffect)
p = plot.df %>% 
  ggplot(aes(x = aph, y = meanEffect)) + 
  geom_point(show.legend = FALSE) +
  geom_smooth() +
  xlab(expression(log[10]~(aph))) + 
  ylab(bquote(mean~delta[f])) + 
  theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 3.296
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.095418
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.036101
## Warning in sqrt(sum.squares/one.delta): NaNs produced
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small.
## fewer data values than degrees of freedom.
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 3.296
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.095418
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.036101
## Warning in stats::qt(level/2 + 0.5, pred$df): NaNs produced

Plot the observed vs the expected peak heights

expected = ln3.df %>% 
  select(starts_with("pred")) %>% 
  summarise_all(mean) %>% 
  unlist()
  
plot.df = data.frame(observed = gNano.df$obs, expected = expected)
p = plot.df %>% 
  ggplot(aes(x = log10(observed), y = log10(exp(expected)))) +
           geom_point() +
           geom_abline(aes(intercept = 0, slope = 1)) + 
          xlim(1, 5) +
          ylim(1, 5) +
          xlab(expression(log[10]~(observed~height))) + 
          ylab(expression(log[10]~(expected~height))) + 
          theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Model \(ln_4\) - extra variance

Is the added variance being used?

ln4.df = loadResults("ln-4")

ln4.df = ln4.df %>% 
  mutate(sigma.sq0 = 1 / tau0)

p = ln4.df %>% 
  ggplot(aes(x = sigma.sq0)) +
  geom_density() +
  geom_vline(xintercept = quantile(ln4.df$sigma.sq0, probs=0.025)) +
  geom_vline(xintercept = quantile(ln4.df$sigma.sq0, probs=0.975)) +
  xlab(bquote(sigma[0]^2)) + 
  theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

Plot the observed vs the expected peak heights

expected.ln4 = ln4.df %>% 
  select(starts_with("pred")) %>% 
  summarise_all(mean) %>% 
  unlist()
  
plot.df = data.frame(observed = gNano.df$obs, expected.ln4 = expected.ln4)
p = plot.df %>% 
  ggplot(aes(x = log10(observed), y = log10(exp(expected.ln4)))) +
           geom_point() +
           geom_abline(aes(intercept = 0, slope = 1)) + 
          xlim(1, 5) +
          ylim(1, 5) +
          xlab(expression(log[10]~(observed~height))) + 
          ylab(expression(log[10]~(expected~height))) + 
          theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"))
p

compare the dlog(likelihoods) for the log-normal model 4 vs the gamma model 4

# You need the latest version of tidyr to make this work. 
# The commands to do this are on the next line
# devtools::install_github("tidyverse/tidyr")
library(tidyr)

#loads the gamma model 4
g4.df = loadResults("g-4")

sr.df = g4.df %>% 
  select(c(starts_with("shape"), starts_with("rate"))) 

sr.df = sr.df %>% 
  pivot_longer(cols = everything(), names_to = "parameter")
  
nObs = length(gNano.df$obs)

sr.df = sr.df %>% 
  mutate(obs = as.numeric(gsub("^(shape|rate)([.]|\\[)([0-9]+)([.]|\\])$", "\\3", parameter)))

sr.df = sr.df %>% 
  mutate(parameter = gsub("^(shape|rate)([.]|\\[)([0-9]+)([.]|\\])$", "\\1", parameter))

shape.df = sr.df %>% 
  filter(parameter == "shape")

rate.df = sr.df %>% 
  filter(parameter == "rate")

sr.df = data.frame(
  obs = shape.df$obs,
  y = rep(gNano.df$obs, 1000),
  shape = shape.df$value,
  rate = rate.df$value
)

sr.df = sr.df %>% 
  mutate(rep = rep(1:1000, rep(nObs, 1000)))

g4.ll = sr.df %>% 
  group_by(rep) %>% 
  summarise(ll = sum(dgamma(y, shape, rate, log = TRUE)))

p = g4.ll %>% 
  ggplot(aes(x = ll)) + geom_histogram()
p

#loads the log-normal model 4
ln4.df = loadResults("ln-4")

ms.df = ln4.df %>% 
  select(c(starts_with("mu", ignore.case = FALSE), matches("^tau([.]|\\[)[0-9]+([.]|\\])$"))) 

ms.df = ms.df %>% 
  pivot_longer(cols = everything(), names_to = "parameter")
  
nObs = length(gNano.df$obs)

ms.df = ms.df %>% 
  mutate(obs = as.numeric(gsub("^(mu|tau)([.]|\\[)([0-9]+)([.]|\\])$", "\\3", parameter)))

ms.df = ms.df %>% 
  mutate(parameter = gsub("^(mu|tau)([.]|\\[)([0-9]+)([.]|\\])$", "\\1", parameter))

mu.df = ms.df %>% 
  filter(parameter == "mu")

sigma.df = ms.df %>% 
  filter(parameter == "tau") %>% 
  mutate(sigma = 1 / sqrt(value)) %>% 
  select(-value)

ms.df = data.frame(
  obs = mu.df$obs,
  y = rep(gNano.df$obs, 1000),
  mu = mu.df$value,
  sigma = sigma.df$sigma
)

ms.df = ms.df %>% 
  mutate(rep = rep(1:1000, rep(nObs, 1000)))

ln4.ll = ms.df %>% 
  group_by(rep) %>% 
  summarise(ll = sum(dlnorm(y, mu, sigma, log = TRUE)))

plot.df = left_join(g4.ll, ln4.ll, by = c("rep" = "rep")) %>% 
  rename(g4 = ll.x, ln4 = ll.y) 
# %>% 
#   pivot_longer(cols = c(g4, ln4), names_to = "model")

#plots densities
p = ggplot(plot.df) 
p = p + geom_density(aes(x = g4), col="blue")
p = p + geom_density(aes(x = ln4), col="red")
p = p + xlab(expression(log[10]~(likelihood)))
p = p + ylab("density")
p = p + theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"), legend.position = "right", legend.justification = "top-right")
p = p + scale_colour_manual(name="distribution", breaks =c("gamma", "log-normal"), values = c("blue", "red"))
p = p + scale_x_continuous(limits=c(-35600, -34600))
p

comparing the g3 model and the g4 model

# You need the latest version of tidyr to make this work. 
# The commands to do this are on the next line
# devtools::install_github("tidyverse/tidyr")
library(tidyr)

#loads the gamma model 4
g4.df = loadResults("g-4")

sr.df = g4.df %>% 
  select(c(starts_with("shape"), starts_with("rate"))) 

sr.df = sr.df %>% 
  pivot_longer(cols = everything(), names_to = "parameter")
  
nObs = length(gNano.df$obs)

sr.df = sr.df %>% 
  mutate(obs = as.numeric(gsub("^(shape|rate)([.]|\\[)([0-9]+)([.]|\\])$", "\\3", parameter)))

sr.df = sr.df %>% 
  mutate(parameter = gsub("^(shape|rate)([.]|\\[)([0-9]+)([.]|\\])$", "\\1", parameter))

shape.df = sr.df %>% 
  filter(parameter == "shape")

rate.df = sr.df %>% 
  filter(parameter == "rate")

sr.df = data.frame(
  obs = shape.df$obs,
  y = rep(gNano.df$obs, 1000),
  shape = shape.df$value,
  rate = rate.df$value
)

sr.df = sr.df %>% 
  mutate(rep = rep(1:1000, rep(nObs, 1000)))

g4.ll = sr.df %>% 
  group_by(rep) %>% 
  summarise(ll = sum(dgamma(y, shape, rate, log = TRUE)))

p = g4.ll %>% 
  ggplot(aes(x = ll)) + geom_histogram()
p

g3.df = loadResults("g-3")


sr.df = g3.df %>% 
  select(c(starts_with("shape"), starts_with("rate"))) 

sr.df = sr.df %>% 
  pivot_longer(cols = everything(), names_to = "parameter")
  
nObs = length(gNano.df$obs)

sr.df = sr.df %>% 
  mutate(obs = as.numeric(gsub("^(shape|rate)([.]|\\[)([0-9]+)([.]|\\])$", "\\2", parameter)))
## Warning: NAs introduced by coercion
sr.df = sr.df %>% 
  mutate(parameter = gsub("^(shape|rate)([.]|\\[)([0-9]+)([.]|\\])$", "\\1", parameter))

shape.df = sr.df %>% 
  filter(parameter == "shape")

rate.df = sr.df %>% 
  filter(parameter == "rate")

sr.df = data.frame(
  obs = shape.df$obs,
  y = rep(gNano.df$obs, 1000),
  shape = shape.df$value,
  rate = rate.df$value
)

sr.df = sr.df %>% 
  mutate(rep = rep(1:1000, rep(nObs, 1000)))

g3.ll = sr.df %>% 
  group_by(rep) %>% 
  summarise(ll = sum(dgamma(y, shape, rate, log = TRUE)))

p = g3.ll %>% 
  ggplot(aes(x = ll)) + geom_histogram()
p

plot.df = left_join(g4.ll, g3.ll, by = c("rep" = "rep")) %>% 
  rename(g4 = ll.x, g3 = ll.y) 
# %>% 
#   pivot_longer(cols = c(g4, ln4), names_to = "model")

#plots densities
p = ggplot(plot.df) 
p = p + geom_density(aes(x = g4), col="blue")
p = p + geom_density(aes(x = g3), col="red")
p = p + xlab(expression(log[10]~(likelihood)))
p = p + ylab("density")
p = p + theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"), legend.position = "right", legend.justification = "top-right")
p = p + scale_colour_manual(name="distribution", breaks =c("gamma4", "gamma3"), values = c("blue", "red"))
#p = p + scale_x_continuous(limits=c(-35600, -34600))
p

compare the ln3 and ln4 models

# You need the latest version of tidyr to make this work. 
# The commands to do this are on the next line
# devtools::install_github("tidyverse/tidyr")
library(tidyr)

#loads the log-normal model 3
ln3.df = loadResults("ln-3")

ms.df = ln3.df %>% 
  select(c(starts_with("mu", ignore.case = FALSE), matches("^tau$"))) 

ms.df = ms.df %>% 
  pivot_longer(cols = everything(), names_to = "parameter")

sigma.df = ms.df %>% 
  filter(parameter == "tau") %>% 
  mutate(sigma = 1 / sqrt(value)) %>% 
  select(-value)

nObs = length(gNano.df$obs)

mu.df = ms.df %>% 
  filter(grepl("^mu", parameter)) %>% 
  mutate(obs = as.numeric(gsub("^mu([.]|\\[)([0-9]+)([.]|\\])$", "\\2", parameter))) %>% 
  select(-parameter)


ms.df = data.frame(
  obs = mu.df$obs,
  y = rep(gNano.df$obs, 1000),
  mu = mu.df$value,
  sigma = rep(sigma.df$sigma, rep(nObs, 1000))
)

ms.df = ms.df %>% 
  mutate(rep = rep(1:1000, rep(nObs, 1000)))

ln3.ll = ms.df %>% 
  group_by(rep) %>% 
  summarise(ll = sum(dlnorm(y, mu, sigma, log = TRUE)))





ln4.df = loadResults("ln-4")

ms.df = ln4.df %>% 
  select(c(starts_with("mu", ignore.case = FALSE), matches("^tau([.]|\\[)[0-9]+([.]|\\])$"))) 

ms.df = ms.df %>% 
  pivot_longer(cols = everything(), names_to = "parameter")
  
nObs = length(gNano.df$obs)

ms.df = ms.df %>% 
  mutate(obs = as.numeric(gsub("^(mu|tau)([.]|\\[)([0-9]+)([.]|\\])$", "\\3", parameter)))

ms.df = ms.df %>% 
  mutate(parameter = gsub("^(mu|tau)([.]|\\[)([0-9]+)([.]|\\])$", "\\1", parameter))

mu.df = ms.df %>% 
  filter(parameter == "mu")

sigma.df = ms.df %>% 
  filter(parameter == "tau") %>% 
  mutate(sigma = 1 / sqrt(value)) %>% 
  select(-value)

ms.df = data.frame(
  obs = mu.df$obs,
  y = rep(gNano.df$obs, 1000),
  mu = mu.df$value,
  sigma = sigma.df$sigma
)

ms.df = ms.df %>% 
  mutate(rep = rep(1:1000, rep(nObs, 1000)))

ln4.ll = ms.df %>% 
  group_by(rep) %>% 
  summarise(ll = sum(dlnorm(y, mu, sigma, log = TRUE)))

plot.df = left_join(ln3.ll, ln4.ll, by = c("rep" = "rep")) %>% 
  rename(ln3 = ll.x, ln4 = ll.y) 
# %>% 
#   pivot_longer(cols = c(g4, ln4), names_to = "model")

#plots densities
p = ggplot(plot.df) 
p = p + geom_density(aes(x = ln4), col="blue")
p = p + geom_density(aes(x = ln3), col="red")
p = p + xlab(expression(log[10]~(likelihood)))
p = p + ylab("density")
p = p + theme(text = element_text(size = 20), aspect.ratio = 1, plot.background = element_rect(color="black"), legend.position = "right", legend.justification = "top-right")
p = p + scale_colour_manual(name="distribution", breaks =c("ln4", "ln3"), values = c("blue", "red"))
# p = p + scale_x_continuous(limits=c(-35600, -34600))
p